/*
 * Copyright (c) 2020 Huawei Technologies Co.,Ltd.
 *
 * openGauss is licensed under Mulan PSL v2.
 * You can use this software according to the terms and conditions of the Mulan PSL v2.
 * You may obtain a copy of Mulan PSL v2 at:
 *
 *          http://license.coscl.org.cn/MulanPSL2
 *
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 * See the Mulan PSL v2 for more details.
 * -------------------------------------------------------------------------
 *
 * parquet_column_reader.h
 *
 * IDENTIFICATION
 *    src/gausskernel/storage/access/dfs/parquet/parquet_column_reader.h
 *
 * -------------------------------------------------------------------------
 */

#ifndef PQRQUET_COLUMN_READER_H
#define PQRQUET_COLUMN_READER_H

#ifndef ENABLE_LITE_MODE
#include "parquet/api/reader.h"
#endif

#include "utils/date.h"
#include "utils/timestamp.h"
#include "utils/cash.h"
#include "utils/dfs_vector.h"
#include "vecexecutor/vectorbatch.h"
#include "access/dfs/dfs_stream.h"
#include "access/dfs/dfs_am.h"

namespace dfs {
constexpr int64_t SECONDS_PER_DAY = INT64_C(60 * 60 * 24);
constexpr int64_t MILLISECONDS_PER_DAY = SECONDS_PER_DAY * INT64_C(1000);
constexpr int64_t MICROSECONDS_PER_DAY = MILLISECONDS_PER_DAY * INT64_C(1000);
constexpr int64_t NANOSECONDS_PER_DAY = MICROSECONDS_PER_DAY * INT64_C(1000);
constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = INT64_C(2440588);
constexpr int64_t NANOSECONDS_PER_MICROSECOND = INT64_C(1000);  // the number of nanoseconds in a microseconds
constexpr int64_t PARQUET_PSQL_EPOCH_IN_DAYS = INT64_C(10957);  // the days base's difference between parquet and pg,
// 19701.1~2000.1.1

constexpr int64_t epochOffsetDiff = (8 * NANOSECONDS_PER_DAY / 24);

typedef Datum (*convertToDatum)(void *, uint64, parquet::Type::type, int32, int32_t, bool &, int32, int32, bool &);

inline int64_t Int96GetNanoSeconds(const parquet::Int96 &i96)
{
    int64_t days_since_epoch = i96.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS;
    int64_t nanoseconds = 0;
    errno_t rc = memcpy_s(&nanoseconds, sizeof(int64_t), i96.value, sizeof(int64_t));
    securec_check(rc, "\0", "\0");
    return days_since_epoch * NANOSECONDS_PER_DAY + nanoseconds;
}

inline Timestamp nanoSecondsToPsqlTimestamp(int64_t nanoSeconds)
{
    nanoSeconds -= (PARQUET_PSQL_EPOCH_IN_DAYS * NANOSECONDS_PER_DAY);
    nanoSeconds += epochOffsetDiff;
    return (Timestamp)(nanoSeconds / NANOSECONDS_PER_MICROSECOND);
}

namespace reader {
/* The restriction types of PARQUET file:file, row gorup. */
enum RestrictionType {
    ROW_GROUP = 1,
};

class ParquetColumnReader : public BaseObject {
public:
    ParquetColumnReader()
    {
    }

    virtual ~ParquetColumnReader()
    {
    }

    virtual void begin(std::unique_ptr<GSInputStream> gsInputStream,
                       const std::shared_ptr<parquet::FileMetaData> &fileMetaData) = 0;

    virtual void Destroy() = 0;

    virtual void setRowGroupIndex(const uint64_t rowGroupIndex) = 0;

    /*
     * Skip a special number of rows in one column of ORC file.
     * @_in_param numValues: The number of rows to read.
     */
    virtual void skip(const uint64_t numValues) = 0;

    virtual void nextInternal(const uint64_t numValuesToRead) = 0;

    /*
     * Set the bloom filter in the column reader.
     * @_in_param bloomFilter: The bloom filter to be set.
     */
    virtual void setBloomFilter(filter::BloomFilter *bloomFilter) = 0;

    /*
     * Check if the current column of ORC file has predicates.
     * @return true: The predicate of the column exists;
     *      false: The predicate of the column does not exists;
     */
    virtual bool hasPredicate() const = 0;

    /*
     * Filter the obtained data with the predicates on the column
     * and set isSelected flags.
     * @_in_param numValues: The number of rows to be filtered.
     * @_in_out_param isSelected: The flag array to indicate which
     *      row is selected or not.
     */
    virtual void predicateFilter(uint64_t numValues, bool *isSelected) = 0;

    virtual int fillScalarVector(uint64_t numRowsToRead, const bool *isSelected, ScalarVector *scalorVector) = 0;

    /*
     * Check if the equal op restrict(we generate a bloom filter) matches
     * the bloomfilter of the parquet file.
     * @_in_param strideIdx: The index of the stride as the bloom filter
     *      is created for each stride in PARQUET file.
     * @return true only if the two bloom filters both exist and match while
     *      one stores in file and the other is generated by the restrict.
     *      Otherwise return false.
     */
    virtual bool checkBloomFilter(uint64_t index) const = 0;

    virtual Node *buildColRestriction(RestrictionType type, parquet::ParquetFileReader *fileReader,
                                      uint64_t rowGroupIndex) const = 0;
};

ParquetColumnReader *createParquetColumnReader(const parquet::ColumnDescriptor *desc, uint32_t columnIndex,
                                               uint32_t mppColumnIndex, ReaderState *readerState, const Var *var);
}  // namespace reader
}  // namespace dfs
#endif
